1 Foreword

Until this vignette, we only used genes features to try to identify and analyze methylation. In this new one, we propose to use methylation features, which means CGI-related features to extract methylation matrix.

In a first part, we’ll present a per-gene visualisation tool and in the second one, a wide database visualisation trough heatmap pipeline, as in the previous vignettes.

Notably, a table named cgi_pf is used, of dimension 27k * 5, processed by hand from TCGA database and giving coordinates, width and center of each CpG islands (CGI) indexed on epic database. Is also used a cgi_coordinates which gives the closest cgi coordinates for each probe, extracted and processed from TCGA database.

2 CGI based gene visualisation

targeted_genes <- penda_superup_deregulated
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)


if(!exists("cgi_indexed_probes")){
  if(!file.exists("~/projects/supergenes/data/RDS/cgi_indexed_probes")){
    cgi_indexed_probes <- get_cgis_probes()
    saveRDS(cgi_indexed_probes,"~/projects/supergenes/data/RDS/cgi_indexed_probes")
    }
  cgi_indexed_probes <- readRDS("~/projects/supergenes/data/RDS/cgi_indexed_probes")
}

feat_closest_cgi <- get_genes_closest_cgis()

Firstly, we defined a plot_gene_cgi, showing CGI and probes in a given window (fixed here at [100k,100k]).

plot_gene_cgi("FKBP4")

plot_gene_cgi("OTX1")

plot_gene_cgi("CDT1")

Note that probes are sorted by coordinates. Thus, the heatmap is readable from left to right.

3 Wide database plots

3.1 Superup genes

3.1.1 CGI binary matrix

The following plot is a visualisation of CGI around the TSS For each plot : a parametrized window surrounding the TSS is separated in to parametrizable bins, and each bins gets a 1 if there is a CGI on it, 0 if else.

get_cgi_mat(window=c(5000,5000))

3.1.2 Healthy tissues

binlist = c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf")
closest_cgi_map<-reduce_map(feat_closest_cgi,c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf"))
data_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
                             values_per_patient = data_reduced_healthy,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(means_per_cgi_healthy, main = "mean of means superup/healthy tissues")


sd_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
                             values_per_patient = sd_reduced_healthy,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(sd_per_cgi_healthy, main = "mean of sd superup/healthy tissues")

3.1.3 Tumoral tissues

means_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
                             values_per_patient = means_reduced_tumoral,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(means_per_cgi_tumoral, main = "mean of means superup/tumoral tissues")


sd_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
                             values_per_patient = sd_reduced_tumoral,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(sd_per_cgi_tumoral, main = "mean of sd superup/tumoral tissues")

3.1.4 Differential values

means_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
                             values_per_patient = means_reduced_differential,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(means_per_cgi_differential, main = "mean of means superup/differential values")


sd_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
                             values_per_patient = sd_reduced_differential,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi)
meth_heatmap(sd_per_cgi_differential, main = "mean of sd superup/differential values")

3.2 Superdown genes

targeted_genes <- penda_superdown_deregulated
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)
feat_closest_cgi_down <- get_genes_closest_cgis()
get_cgi_mat(window=c(5000,5000))

3.2.1 Healthy tissues

closest_cgi_map<-reduce_map(feat_closest_cgi_down,c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf"))
data_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
                             values_per_patient = data_reduced_healthy,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(means_per_cgi_healthy, main = "mean of means superdown/healthy tissues")


sd_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
                             values_per_patient = sd_reduced_healthy,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(sd_per_cgi_healthy, main = "mean of sd superdown/healthy tissues")

3.2.2 Tumoral tissues

means_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
                             values_per_patient = means_reduced_tumoral,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(means_per_cgi_tumoral, main = "mean of means superdown/tumoral tissues")


sd_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
                             values_per_patient = sd_reduced_tumoral,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(sd_per_cgi_tumoral, main = "mean of sd superdown/tumoral tissues")

3.2.3 Differential values

means_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
                             values_per_patient = means_reduced_differential,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(means_per_cgi_differential, main = "mean of means superdown/differential values")


sd_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
                             values_per_patient = sd_reduced_differential,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi_down)
meth_heatmap(sd_per_cgi_differential, main = "mean of sd superdown/differential values")

3.3 Supercons genes

targeted_genes <- penda_superconserved
features <- get_features(targeted_genes, study = trscr_lusc, up_str = 7500, dwn_str = 7500)
feat_closest_cgi_cons <- get_genes_closest_cgis()
closest_cgi_map<-reduce_map(feat_closest_cgi_cons,c("N_Shore","opensea","S_Shore","Island","S_Shelf","N_Shelf"))
get_cgi_mat(window=c(5000,5000))

3.3.1 Healthy tissues

data_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
                             values_per_patient = data_reduced_healthy,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(means_per_cgi_healthy, main = "mean of means supercons/healthy tissues")


sd_reduced_healthy <- reduce_rows(meth_normal,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_healthy <- subset_vals_per_bins(data = meth_normal,
                             values_per_patient = sd_reduced_healthy,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(sd_per_cgi_healthy, main = "mean of sd supercons/healthy tissues")

3.3.2 Tumoral tissues

means_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
                             values_per_patient = means_reduced_tumoral,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(means_per_cgi_tumoral, main = "mean of means supercons/tumoral tissues")


sd_reduced_tumoral <- reduce_rows(meth_tumoral,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_tumoral <- subset_vals_per_bins(data = meth_tumoral,
                             values_per_patient = sd_reduced_tumoral,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(sd_per_cgi_tumoral, main = "mean of sd supercons/tumoral tissues")

3.3.3 Differential values

means_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, mean ,na.rm=T)
means_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
                             values_per_patient = means_reduced_differential,
                             fun = mean,
                             binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(means_per_cgi_differential, main = "mean of means supercons/differential values")


sd_reduced_differential <- reduce_rows(meth_diff,closest_cgi_map, sd ,na.rm=T)
sd_per_cgi_differential <- subset_vals_per_bins(data = meth_diff,
                             values_per_patient = sd_reduced_differential,
                             fun = sd,
                             binlist=binlist,probes_index = feat_closest_cgi_cons)
meth_heatmap(sd_per_cgi_differential, main = "mean of sd supercons/differential values")